clear all 
set more off 
set maxvar 15000 
clear matrix

**********************************************************************************************************************************************
**********************************************************************************************************************************************

	use "$Mydirectory1/3_Output/2_PooledData_analysis.dta", clear 
    keep if baseline_sample==1 
    
    * Set up time periods
    gen early = decade==1910 | decade==1920
    gen late = decade==1940 | decade==1950
    keep if early==1 | late==1
    tab decade
	
	* Residualized decade FE
	gen yhat_fam= .
	gen yhat_father= .
	
	foreach name in early late { 
	forval r=1/2 { 
	
		quietly reg log_son_baseline i.decade if `name'==1 & race==`r'  [aw=wgt_sex_race]
					predict yhat_fam`name'`r' if `name'==1 & race==`r' , residuals
					sum log_son_baseline if `name'==1 & race==`r'  [aw=wgt_sex_race]
					replace yhat_fam = yhat_fam`name'`r' + `r(mean)' if `name'==1 & race==`r' 
			
		quietly reg log_father_baseline i.decade if `name'==1 & race==`r' [aw=wgt_sex_race]
					predict yhat_father`name'`r' if `name'==1 & race==`r' , residuals
					sum log_father_baseline if `name'==1 & race==`r' [aw=wgt_sex_race]
					replace yhat_father = yhat_father`name'`r' + `r(mean)' if `name'==1 & race==`r' 
	}
	}
	
	replace log_son_baseline = yhat_fam 
	replace log_father_baseline = yhat_father	
            
                   
        local dep "log_son_baseline"
        local indep "log_father_baseline"
        
    * Define groups
        gen group = .
        replace group=1 if race==1 
        replace group=2 if race==2
        tab group, m
        drop if group==.
        
        tempfile fulldata
        save `fulldata'
        
    * Full population IGE: early period
        reg `dep' `indep' if early==1 [pw=wgt_sex_race] , robust 
        local beta_actual_early = _b[`indep']
        display `beta_actual_early'
        
    * Full population IGE: late period
        reg `dep' `indep' if late==1 [pw=wgt_sex_race] , robust 
        local beta_actual_late = _b[`indep']
        display `beta_actual_late'
        
    * Set up parameters for simulation 
    foreach x in early late {

        use `fulldata', clear 
        keep if `x'==1
        
        * Beta by group
        forval i=1(1)2 {
            reg `dep' `indep' [pw=wgt_sex_race] if group==`i', robust 
            local beta_`i'_`x' = _b[`indep']
        }
        
        * Population share
        tab group, gen(group_)
        forval i=1(1)2 {
            sum group_`i' [aw=wgt_sex_race]
            local p_`i'_`x' = r(mean)
        }
        
        * Variance terms
        sum `indep'  [aw=wgt_sex_race]
        local tot_var_`x' = r(Var)
        forval i=1(1)2 {
            sum `indep'   [aw=wgt_sex_race] if group==`i'
            local tot_var_`i'_`x' = r(Var)
        }

        * Expected values of y
        sum `dep'  [aw=wgt_sex_race]
        local avg_y_`x' = r(mean)
        forval i=1(1)2 {
            sum `dep'  [aw=wgt_sex_race] if group==`i'
            local avg_y_`i'_`x' = r(mean)
        }

        * Expected values of x
        sum `indep'  [aw=wgt_sex_race]
        local avg_x_`x' = r(mean)
        forval i=1(1)2 {
            sum `indep'  [aw=wgt_sex_race] if group==`i'
            local avg_x_`i'_`x' = r(mean)
        }

        * Put together formula
        forval i=1(1)2 {
            local term_`i'_`x' = (`p_`i'_`x''*(`tot_var_`i'_`x'' / `tot_var_`x'') * `beta_`i'_`x'') 
            display `term_`i'_`x''
        }
        
        loca numerator_`x' = ((`p_1_`x''*`avg_x_1_`x''*`avg_y_1_`x'')+(`p_2_`x''*`avg_x_2_`x''*`avg_y_2_`x'') -(`avg_x_`x''*`avg_y_`x''))
        display `numerator_`x''
        display `tot_var_`x''
        local last_term_`x' = `numerator_`x'' / (`tot_var_`x'')
        display `last_term_`x''
        
        local beta_test_`x' = `term_1_`x'' + `term_2_`x'' + `last_term_`x'' 
                
    }
        
    * Display two terms that should be the same
        display `beta_test_early'
        display `beta_actual_early'
        
        display `beta_test_late'
        display `beta_actual_late'

        
*------------*   
* SIMULATE 
*------------*   

        use `fulldata', clear
        
    * 1. Lower the degree of catch-up between black/white 
        local dep "log_son_baseline"
        local indep "log_father_baseline"
        
        display `last_term_early' 
        display `last_term_late' 
        
        //Changing the incomes of Black respondents 
        clonevar ynew = log_son_baseline
        
        sum ynew  [aw=wgt_sex_race] if group==2 & late==0
        local black_mean_early = `r(mean)'
        sum ynew  [aw=wgt_sex_race] if group==2 & late==1
        local black_mean_late = `r(mean)'
		
        sum ynew  [aw=wgt_sex_race] if group==1 & late==1
        local white_mean_late = `r(mean)'
        sum ynew   [aw=wgt_sex_race] if group==1 & late==0
        local white_mean_early = `r(mean)'
        
        replace ynew = log_son_baseline -(`black_mean_late' - `black_mean_early') + (`white_mean_late' - `white_mean_early')  if group==2 & late==1
     

    * Third-term       
        sum log_son_baseline if group==2 & late==1 [aw=wgt_sex_race]
        sum ynew if group==2 & late==1 [aw=wgt_sex_race]
        local avg_new_b = `r(mean)'
        
        sum log_son_baseline if late==1 [aw=wgt_sex_race]
        sum ynew if late==1 [aw=wgt_sex_race] 
        local avg_new_all = `r(mean)'
        local sd_new_all = `r(sd)'
        
        local numerator = ((`p_1_late'*`avg_x_1_late'*`avg_y_1_late')+(`p_2_late'*`avg_x_2_late'*`avg_y_2_late') -(`avg_x_late'*`avg_y_late'))
        local last_term_old = `numerator' / (`tot_var_late')
        display `last_term_old' 
        
        local numerator = ((`p_1_late'*`avg_x_1_late'*`avg_y_1_late')+(`p_2_late'*`avg_x_2_late'*`avg_new_b') -(`avg_x_late'*`avg_new_all'))
        local last_term_new = `numerator' / (`tot_var_late')
        display `last_term_new' //bigger last term than before because less convergence
        
        reg `dep' `indep' [pw=wgt_sex_race] if early==1 & group==1, robust //0.58
        reg `dep' `indep' [pw=wgt_sex_race] if late==1 & group==1, robust  //0.25
        reg ynew `indep' [pw=wgt_sex_race] if late==1 & group==1, robust   //0.25 - hasn't changed by design
        
        reg `dep' `indep' [pw=wgt_sex_race] if late==1 & group==2, robust //0.12
        reg ynew `indep' [pw=wgt_sex_race] if late==1 & group==2, robust //0.12
        
        reg `dep' `indep' [pw=wgt_sex_race] if early==1, robust  //0.65 
        reg `dep' `indep' [pw=wgt_sex_race] if late==1, robust  //0.34
        reg ynew `indep' [pw=wgt_sex_race] if late==1, robust  //0.40
        
        reg ynew `indep' [pw=wgt_sex_race] if late==1, robust beta 
        

    * 2.Changing white slope only (adjusting mean, adding back in residuals)
        reg log_son_baseline log_father_baseline [pw=wgt_sex_race] if early==1 & group ==1, robust
        local early_slope = _b[log_father_baseline]     
            
        reg log_son_baseline log_father_baseline [pw=wgt_sex_race] if late==1 & group ==1, robust
        predict res, residuals
                
        sum log_father_baseline [aw=wgt_sex_race] if late==1 & group ==1
        local late_father_mean = `r(mean)'

        sum log_son_baseline [aw=wgt_sex_race] if late==1 & group ==1
        local late_son_mean = `r(mean)'
        
        clonevar ynew2 = log_son_baseline       
        replace ynew2= `late_son_mean'- (`early_slope'*`late_father_mean') + (`early_slope'*log_father_baseline) + res if late==1 & group ==1
        
        reg log_son_baseline log_father_baseline [pw=wgt_sex_race] if early==1 & group ==1, robust      
        reg log_son_baseline log_father_baseline [pw=wgt_sex_race] if late==1 & group ==1, robust
        reg ynew2 log_father_baseline [pw=wgt_sex_race] if late==1 & group ==1, robust //later period but same slope as early
        
        sum log_son_baseline [aw=wgt_sex_race] if late==1 & group ==1 
        sum ynew2 [aw=wgt_sex_race] if late==1 & group ==1 
        local avg_new_w = `r(mean)'
        
        sum log_son_baseline if group==2 & late==1 [aw=wgt_sex_race]
        sum ynew2 if group==2 & late==1 [aw=wgt_sex_race] //Note: unchanged for black respondents
        local avg_new_b = `r(mean)'
        
        sum log_son_baseline if late==1 [aw=wgt_sex_race]
        sum ynew2 if late==1 [aw=wgt_sex_race] //Note: unchanged for full group
        local avg_new_all = `r(mean)'
        
        local numerator = ((`p_1_late'*`avg_x_1_late'*`avg_y_1_late')+(`p_2_late'*`avg_x_2_late'*`avg_y_2_late') -(`avg_x_late'*`avg_y_late'))
        local last_term_old = `numerator' / (`tot_var_late')
        display `last_term_old'     
        
        local numerator = ((`p_1_late'*`avg_x_1_late'*`avg_new_w')+(`p_2_late'*`avg_x_2_late'*`avg_new_b') -(`avg_x_late'*`avg_new_all'))
        local last_term_new = `numerator' / (`tot_var_late')
        display `last_term_new' //Note: third term hasn't changed
        
        
        reg `dep' `indep' [pw=wgt_sex_race] if early==1 & group==1, robust 
        reg `dep' `indep' [pw=wgt_sex_race] if late==1 & group==1, robust 
        reg ynew2 `indep' [pw=wgt_sex_race] if late==1 & group==1, robust 
        
        reg `dep' `indep' [pw=wgt_sex_race] if late==1 & group==2, robust 
        reg ynew2 `indep' [pw=wgt_sex_race] if late==1 & group==2, robust 
        
        reg `dep' `indep' [pw=wgt_sex_race] if late==1, robust 
        reg ynew2 `indep' [pw=wgt_sex_race] if late==1, robust 
        
        
        * Assess IGC
        reg `dep' `indep' [pw=wgt_sex_race] if early==1, robust beta 
        reg `dep' `indep' [pw=wgt_sex_race] if late==1, robust beta 
        reg ynew2 `indep' [pw=wgt_sex_race] if late==1, robust beta          
        //Note: The step above doesn't change the rankings. 

        egen rank_father = xtile(`indep'), by(dob) nq(100) weight(wgt_sex_race)
        egen rank_son = xtile(`dep'), by(dob) nq(100) weight(wgt_sex_race)
        egen rank_son_new = xtile(ynew), by(dob) nq(100) weight(wgt_sex_race)
        egen rank_son_new2 = xtile(ynew2), by(dob) nq(100) weight(wgt_sex_race)

        reg rank_son rank_father if early==1  [pw=wgt_sex_race]  
        reg rank_son rank_father if late==1  [pw=wgt_sex_race] 
        /****third term***/
        reg rank_son_new rank_father if late==1 [pw=wgt_sex_race] 
        /****white slope***/
        reg rank_son_new2 rank_father if late==1 [pw=wgt_sex_race] 
        
        
* Save (for graphing later)

    gen cat=_n
    gen sim=.
    
    * Measures -- early 
    reg log_son_baseline log_father_baseline if early==1 [pw=wgt_sex_race]
    replace sim = _b[log_father_baseline] if cat==1
    
    reg rank_son rank_father if early==1 [pw=wgt_sex_race]
    replace sim = _b[rank_father] if cat==2 
    
    sum log_father_baseline  if early==1 [aw=wgt_sex_race]
    local sig_x = `r(sd)'
    
    sum log_son_baseline if early==1 [aw=wgt_sex_race]
    local sig_y = `r(sd)'
    
    local ratio1 = `sig_x'/`sig_y'
    
    reg log_son_baseline log_father_baseline if early==1 [pw=wgt_sex_race]      
        lincom log_father_baseline * `ratio1'
        replace sim = `r(estimate)' if cat==3   
        

    * Simulation 1 
    reg ynew log_father_baseline if late==1 [pw=wgt_sex_race]
    replace sim = _b[log_father_baseline] if cat==5
    
    reg rank_son_new rank_father if late==1 [pw=wgt_sex_race]
    replace sim = _b[rank_father] if cat==6 
    
    sum log_father_baseline  if late==1 [aw=wgt_sex_race]
    local sig_x = `r(sd)'
    
    sum ynew if late==1 [aw=wgt_sex_race]
    local sig_y = `r(sd)'
    
    local ratio1 = `sig_x'/`sig_y'
    
    reg ynew log_father_baseline if late==1 [pw=wgt_sex_race]       
        lincom log_father_baseline * `ratio1'
        replace sim = `r(estimate)' if cat==7       
        

    * Simulation 2 
    reg ynew2 log_father_baseline if late==1 [pw=wgt_sex_race]
    replace sim = _b[log_father_baseline] if cat==9
    
    reg rank_son_new2 rank_father if late==1 [pw=wgt_sex_race]
    replace sim = _b[rank_father] if cat==10
    
    sum log_father_baseline  if late==1 [aw=wgt_sex_race]
    local sig_x = `r(sd)'
    
    sum ynew2 if late==1 [aw=wgt_sex_race]
    local sig_y = `r(sd)'
    
    local ratio1 = `sig_x'/`sig_y'
    
    reg ynew2 log_father_baseline if late==1 [pw=wgt_sex_race]      
        lincom log_father_baseline * `ratio1'
        replace sim = `r(estimate)' if cat==11      
        
        
    * Measures -- Late
    reg log_son_baseline log_father_baseline if late==1 [pw=wgt_sex_race]
    replace sim = _b[log_father_baseline] if cat==13
    
    reg rank_son rank_father if late==1 [pw=wgt_sex_race]
    replace sim = _b[rank_father] if cat==14
    
    sum log_father_baseline  if late==1 [aw=wgt_sex_race]
    local sig_x = `r(sd)'
    
    sum log_son_baseline if late==1 [aw=wgt_sex_race]
    local sig_y = `r(sd)'
    
    local ratio1 = `sig_x'/`sig_y'
    
    reg log_son_baseline log_father_baseline if late==1 [pw=wgt_sex_race]       
        lincom log_father_baseline * `ratio1'
        replace sim = `r(estimate)' if cat==15      
    
    
    foreach x in 1 2 3 5 6 7 9 10 11 13 14 15 {
        quietly sum sim if cat==`x'
        local number_`x': display %4.2f `r(mean)'
    }
    
    
    #delimit ;
        twoway (bar sim cat if cat==1, yaxis(1) fcolor(navy*0.5) lcolor(navy))
               (bar sim cat if cat==2, yaxis(2) fcolor(teal*0.5) lcolor(teal))
               (bar sim cat if cat==3, yaxis(2) fcolor(erose*0.5) lcolor(erose))
            
               (bar sim cat if cat==5, yaxis(1) fcolor(navy*0.5) lcolor(navy))
               (bar sim cat if cat==6, yaxis(2) fcolor(teal*0.5) lcolor(teal)) 
               (bar sim cat if cat==7, yaxis(2) fcolor(erose*0.5) lcolor(erose))   
            
               (bar sim cat if cat==9, yaxis(1) fcolor(navy*0.5) lcolor(navy))
               (bar sim cat if cat==10, yaxis(2) fcolor(teal*0.5) lcolor(teal))    
               (bar sim cat if cat==11, yaxis(2) fcolor(erose*0.5) lcolor(erose))      
            
               (bar sim cat if cat==13, yaxis(1) fcolor(navy*0.5) lcolor(navy))
               (bar sim cat if cat==14, yaxis(2) fcolor(teal*0.5) lcolor(teal))    
               (bar sim cat if cat==15, yaxis(2) fcolor(erose*0.5) lcolor(erose))      
        ,
            ylabel(0.25(.1).85, axis(1)) yti("IGE coefficient" " ", axis(1)) ylabel(0.15(.1).45, axis(2)) yti(" " "Rank and IGC", axis(2))
            xti("") xlabel(2 "1910-1929" 6 `" "No Black-white" "catch up" "' 10 `" "No white" "slope decline" "' 14 "1940-1959") legend(on rows(1) ring(0) pos(1) order(1 "IGE" 5 "Rank" 6 "IGC"))
        
            xsize(10) ysize(5.5)
            
            text(`number_1' 1 "`number_1'" " ", alignment(top) placement(north) size(small)) 
            text(`number_5' 5 "`number_5'" " ", alignment(top) placement(north) size(small)) 
            text(`number_9' 9 "`number_9'" " ", alignment(top) placement(north) size(small)) 
            text(`number_13' 13 "`number_13'" " ", alignment(top) placement(north) size(small)) 

            text(0.61 2 "`number_2'" " ", alignment(top) placement(north) size(small)) 
            text(0.67 3 "`number_3'" " ", alignment(top) placement(north) size(small)) 
            
            text(0.49 6 "`number_6'" " ", alignment(top) placement(north) size(small)) 
            text(0.47 7 "`number_7'" " ", alignment(top) placement(north) size(small))
            
            text(0.55 10 "`number_10'" " ", alignment(top) placement(north) size(small)) 
            text(0.49 11 "`number_11'" " ", alignment(top) placement(north) size(small))  
            
            text(0.44 14 "`number_14'" " ", alignment(top) placement(north) size(small))       
            text(0.41 15 "`number_15'" " ", alignment(top) placement(north) size(small))       
        ;
    #delimit cr
        graph export "$Mydirectory2/main_figures_tables/figure7.pdf", replace  
    
    